from datasets import load_dataset, Audio

# 指定本地数据集路径（请确保路径正确）
data_dir = "/media/byy/data5/why/AI智能化/10voice/kaggle-1/voxpopuli-data/nl"

# 从本地加载数据集，确保不连接网络
dataset = load_dataset(
    "facebook/voxpopuli",
    "nl",
    split="train",
    data_dir=data_dir,  # 使用正确的参数名
    keep_in_memory=False,
    # offline=True  # 确保不连接网络
)
